#front matter
rm(list=ls())
library(foreign)
library(Hmisc)
library(timeSeries)
library(lmtest)

#set working directory
#setwd("/Volumes/MONOGAN/DISSERTATION/partisanship/data/")


##### TEXAS ######
###Define Data Management Functions for Texas Files###
#Create a function for combining all data:
accumulate<-function(data, y, q, party.var, rep.value, dem.value){
	attach(get(paste(data)))
	a<-get(paste(party.var))
	a[a>3]<-NA
	republican<-as.numeric(a==rep.value)
	democrat<-as.numeric(a==dem.value)
	b<-length(republican)
	year<-rep(y,b)
	quarter<-rep(q,b)
	joint<- quarter+(year*100)
	X<-cbind(republican, democrat, year, quarter, joint)
	detach(get(paste(data)))
	return(X)	
	}

#Create a function for combining data contingent on race:
race.subsetting<-function(data, y, q, party.var, rep.value, dem.value, race.var, race.value){
	d<-get(paste(data))
	attach(d)
	e<-get(paste(race.var))
	small<-subset(d, e==race.value)
	detach(d)
	attach(small)
	a<-get(paste(party.var))
	a[a>3]<-NA
	republican<-as.numeric(a==rep.value)
	democrat<-as.numeric(a==dem.value)
	b<-length(republican)
	year<-rep(y,b)
	quarter<-rep(q,b)
	joint<- quarter+(year*100)
	X<-cbind(republican, democrat, year, quarter, joint)
	detach(small)
	return(X)	
	}

#DUPLICATE FUNCTIONS TO HANDLE TWO VALUES FOR REPUBLICAN
accumulate.2<-function(data, y, q, party.var, rep.value.1, rep.value.2, dem.value.1, dem.value.2){
	attach(get(paste(data)))
	a<-get(paste(party.var))
	a[a>7]<-NA
	republican<-as.numeric(a==rep.value.1 | a==rep.value.2)
	democrat<-as.numeric(a==dem.value.1 | a==dem.value.2)
	b<-length(republican)
	year<-rep(y,b)
	quarter<-rep(q,b)
	joint<- quarter+(year*100)
	X<-cbind(republican, democrat, year, quarter, joint)
	detach(get(paste(data)))
	return(X)	
	}

race.subsetting.2<-function(data, y, q, party.var, rep.value.1, rep.value.2, dem.value.1, dem.value.2, race.var, race.value){
	d<-get(paste(data))
	attach(d)
	e<-get(paste(race.var))
	small<-subset(d, e==race.value)
	detach(d)
	attach(small)
	a<-get(paste(party.var))
	a[a>7]<-NA
	republican<-as.numeric(a==rep.value.1 | a==rep.value.2)
	democrat<-as.numeric(a==dem.value.1 | a==dem.value.2)
	b<-length(republican)
	year<-rep(y,b)
	quarter<-rep(q,b)
	joint<- quarter+(year*100)
	X<-cbind(republican, democrat, year, quarter, joint)
	detach(small)
	return(X)	
	}
	
###Input Texas data files (4 years before, and 4 years after)###
winter.1990<-read.dta("texasPoll/winter90.dta", convert.factors=FALSE)
spring.1990<-read.dta("texasPoll/spring90.dta", convert.factors=FALSE)
summer.1990<-read.dta("texasPoll/summer90.dta", convert.factors=FALSE)
fall.1990<-read.dta("texasPoll/fall90.dta", convert.factors=FALSE)

winter.1991<-read.dta("texasPoll/winter91.dta", convert.factors=FALSE)
spring.1991<-read.dta("texasPoll/spring91.dta", convert.factors=FALSE)
summer.1991<-read.dta("texasPoll/summer91.dta", convert.factors=FALSE)
fall.1991<-read.dta("texasPoll/fall91.dta", convert.factors=FALSE)

winter.1992<-read.dta("texasPoll/winter92.dta", convert.factors=FALSE)
spring.1992<-read.dta("texasPoll/spring92.dta", convert.factors=FALSE)
summer.1992<-read.dta("texasPoll/summer92.dta", convert.factors=FALSE)
fall.1992<-read.dta("texasPoll/fall92.dta", convert.factors=FALSE)

winter.1993<-read.dta("texasPoll/winter93.dta", convert.factors=FALSE)
spring.1993<-read.dta("texasPoll/spring93.dta", convert.factors=FALSE)
summer.1993<-read.dta("texasPoll/summer93.dta", convert.factors=FALSE)
fall.1993<-read.dta("texasPoll/fall93.dta", convert.factors=FALSE)

winter.1994<-read.spss("texasPoll/Winter94.sav", to.data.frame=TRUE, use.value.labels=FALSE)
spring.1994<-read.spss("texasPoll/Spring94.sav", to.data.frame=TRUE, use.value.labels=FALSE)
summer.1994<-read.spss("texasPoll/Summer94.sav", to.data.frame=TRUE, use.value.labels=FALSE)
fall.1994<-read.spss("texasPoll/Fall94.sav", to.data.frame=TRUE, use.value.labels=FALSE)

february.1995<-read.spss("texasPoll/Feb95.sav", to.data.frame=TRUE, use.value.labels=FALSE)
april.1995<-read.spss("texasPoll/April95.sav", to.data.frame=TRUE, use.value.labels=FALSE)
august.1995<-read.spss("texasPoll/aug95.sav", to.data.frame=TRUE, use.value.labels=FALSE)
october.1995<-read.spss("texasPoll/oct95.sav", to.data.frame=TRUE, use.value.labels=FALSE) 

february.1996<-read.spss("texasPoll/Feb96.sav", to.data.frame=TRUE, use.value.labels=FALSE) 
june.One.1996<-read.spss("texasPoll/June96one.sav", to.data.frame=TRUE, use.value.labels=FALSE)
june.Two.1996<-read.spss("texasPoll/June96two.sav", to.data.frame=TRUE, use.value.labels=FALSE)
september.1996<-read.spss("texasPoll/Sep96.sav", to.data.frame=TRUE, use.value.labels=FALSE)
october.1996<-read.spss("texasPoll/Oct96.sav", to.data.frame=TRUE, use.value.labels=FALSE)

february.1997<-read.spss("texasPoll/Feb97.sav", to.data.frame=TRUE, use.value.labels=FALSE)
april.1997<-read.spss("texasPoll/Apr97.sav", to.data.frame=TRUE, use.value.labels=FALSE)
august.1997<-read.spss("texasPoll/Aug97.sav", to.data.frame=TRUE, use.value.labels=FALSE)
october.1997<-read.spss("texasPoll/oct97.sav", to.data.frame=TRUE, use.value.labels=FALSE)

february.1998<-read.spss("texasPoll/Feb98.sav", to.data.frame=TRUE, use.value.labels=FALSE)
june.1998<-read.spss("texasPoll/June98.sav", to.data.frame=TRUE, use.value.labels=FALSE)
august.1998<-read.spss("texasPoll/Aug98.sav", to.data.frame=TRUE, use.value.labels=FALSE)
october.1998<-read.spss("texasPoll/Oct98.sav", to.data.frame=TRUE, use.value.labels=FALSE)

###Process Texas Data###
d.25<-accumulate.2("winter.1990", 1990, 1, "rpartyid", 1, 2, 6, 7)
d.26<-accumulate("spring.1990", 1990, 2, "rpartyid", 1, 2)
d.27<-accumulate("summer.1990", 1990, 3, "rpartyid", 1, 2)
d.28<-accumulate("fall.1990", 1990, 4, "rpartyid", 1, 2)
d.29<-accumulate("winter.1991", 1991, 1, "rpartyid", 1, 2)
d.30<-accumulate("spring.1991", 1991, 2, "rpartyid", 1, 2)
d.31<-accumulate("summer.1991", 1991, 3, "rpartyid", 1, 2)
d.32<-accumulate("fall.1991", 1991, 4, "rpartyid", 1, 2)
d.33<-accumulate("winter.1992", 1992, 1, "rpartyid", 1, 2)
d.34<-accumulate("spring.1992", 1992, 2, "rpartyid", 1, 2)
d.35<-accumulate("summer.1992", 1992, 3, "rpartyid", 1, 2)
d.36<-accumulate("fall.1992", 1992, 4, "rpartyid", 1, 2)
d.37<-accumulate("winter.1993", 1993, 1, "rpartyid", 1, 2)
d.38<-accumulate("spring.1993", 1993, 2, "rpartyid", 1, 2)
d.39<-accumulate("summer.1993", 1993, 3, "rpartyid", 1, 2)
d.40<-accumulate("fall.1993", 1993, 4, "rpartyid", 1, 2)
d.41<-accumulate.2("winter.1994", 1994, 1, "d9", 1, 2, 6, 7)
d.42<-accumulate.2("spring.1994", 1994, 2, "d9", 1, 2, 6, 7)
d.43<-accumulate("summer.1994", 1994, 3, "d009", 1, 2)
d.44<-accumulate("fall.1994", 1994, 4, "d009", 1, 2)
d.45<-accumulate("february.1995", 1995, 1, "d009", 1, 2)
d.46<-accumulate("april.1995", 1995, 2, "d009", 1, 2)
d.47<-accumulate("august.1995", 1995, 3, "d009", 1, 2)
d.48<-accumulate("october.1995", 1995, 4, "d009", 1, 2)
d.49<-accumulate("february.1996", 1996, 1, "d009", 1, 2)
d.50<-accumulate("june.One.1996", 1996, 2, "d009", 1, 2)
d.51<-accumulate("june.Two.1996", 1996, 2, "d009", 1, 2)
d.52<-accumulate("september.1996", 1996, 3, "d009", 1, 2)
d.53<-accumulate("october.1996", 1996, 4, "d009", 1, 2)
d.54<-accumulate("february.1997", 1997, 1, "d009", 1, 2)
d.55<-accumulate("april.1997", 1997, 2, "d009", 1, 2)
d.56<-accumulate("august.1997", 1997, 3, "d009", 1, 2)
d.57<-accumulate("october.1997", 1997, 4, "d009", 1, 2)
d.58<-accumulate("february.1998", 1998, 1, "d009", 1, 2)
d.59<-accumulate("june.1998", 1998, 2, "d009", 1, 2)
d.60<-accumulate("august.1998", 1998, 3, "d009", 1, 2)
d.61<-accumulate("october.1998", 1998, 4, "d009", 1, 2)

#Create white subsets 
w.25<-race.subsetting.2("winter.1990", 1990, 1, "rpartyid", 1, 2, 6, 7, "ethnic", 1)
w.26<-race.subsetting("spring.1990", 1990, 2, "rpartyid", 1, 2, "ethnic", 1)
w.27<-race.subsetting("summer.1990", 1990, 3, "rpartyid", 1, 2, "ethnic", 1)
w.28<-race.subsetting("fall.1990", 1990, 4, "rpartyid", 1, 2, "ethnic", 1)
w.29<-race.subsetting("winter.1991", 1991, 1, "rpartyid", 1, 2, "ethnic", 1)
w.30<-race.subsetting("spring.1991", 1991, 2, "rpartyid", 1, 2, "ethnic", 1)
w.31<-race.subsetting("summer.1991", 1991, 3, "rpartyid", 1, 2, "ethnic", 1)
w.32<-race.subsetting("fall.1991", 1991, 4, "rpartyid", 1, 2, "ethnic", 1)
w.33<-race.subsetting("winter.1992", 1992, 1, "rpartyid", 1, 2, "ethnic", 1)
w.34<-race.subsetting("spring.1992", 1992, 2, "rpartyid", 1, 2, "ethnic", 1)
w.35<-race.subsetting("summer.1992", 1992, 3, "rpartyid", 1, 2, "ethnic", 1)
w.36<-race.subsetting("fall.1992", 1992, 4, "rpartyid", 1, 2, "ethnic", 1)
w.37<-race.subsetting("winter.1993", 1993, 1, "rpartyid", 1, 2, "ethnic", 1)
w.38<-race.subsetting("spring.1993", 1993, 2, "rpartyid", 1, 2, "ethnic", 1)
w.39<-race.subsetting("summer.1993", 1993, 3, "rpartyid", 1, 2, "ethnic", 1)
w.40<-race.subsetting("fall.1993", 1993, 4, "rpartyid", 1, 2, "ethnic", 1)
w.41<-race.subsetting.2("winter.1994", 1994, 1, "d9", 1, 2, 6, 7, "d8", 1)
w.42<-race.subsetting.2("spring.1994", 1994, 2, "d9", 1, 2, 6, 7, "d8", 1)
w.43<-race.subsetting("summer.1994", 1994, 3, "d009", 1, 2, "d008", 1)
w.44<-race.subsetting("fall.1994", 1994, 4, "d009", 1, 2, "d008", 1)
w.45<-race.subsetting("february.1995", 1995, 1, "d009", 1, 2, "d008", 1)
w.46<-race.subsetting("april.1995", 1995, 2, "d009", 1, 2, "d008", 1)
w.47<-race.subsetting("august.1995", 1995, 3, "d009", 1, 2, "d008", 1)
w.48<-race.subsetting("october.1995", 1995, 4, "d009", 1, 2, "d008", 1)
w.49<-race.subsetting("february.1996", 1996, 1, "d009", 1, 2, "d008", 1)
w.50<-race.subsetting("june.One.1996", 1996, 2, "d009", 1, 2, "d008", 1)
w.51<-race.subsetting("june.Two.1996", 1996, 2, "d009", 1, 2, "d008", 1)
w.52<-race.subsetting("september.1996", 1996, 3, "d009", 1, 2, "dh03", 1)
w.53<-race.subsetting("october.1996", 1996, 4, "d009", 1, 2, "dh03", 1)
w.54<-race.subsetting("february.1997", 1997, 1, "d009", 1, 2, "dh03", 1)
w.55<-race.subsetting("april.1997", 1997, 2, "d009", 1, 2, "dh03", 1)
w.56<-race.subsetting("august.1997", 1997, 3, "d009", 1, 2, "dh03", 1)
w.57<-race.subsetting("october.1997", 1997, 4, "d009", 1, 2, "dh03", 1)
w.58<-race.subsetting("february.1998", 1998, 1, "d009", 1, 2, "dh03", 1)
w.59<-race.subsetting("june.1998", 1998, 2, "d009", 1, 2, "dh03", 1)
w.60<-race.subsetting("august.1998", 1998, 3, "d009", 1, 2, "dh03", 1)
w.61<-race.subsetting("october.1998", 1998, 4, "d009", 1, 2, "dh03", 1)

#Create hispanic subsets 
h.25<-race.subsetting.2("winter.1990", 1990, 1, "rpartyid", 1, 2, 6, 7, "ethnic", 3)
h.26<-race.subsetting("spring.1990", 1990, 2, "rpartyid", 1, 2, "ethnic", 3)
h.27<-race.subsetting("summer.1990", 1990, 3, "rpartyid", 1, 2, "ethnic", 3)
h.28<-race.subsetting("fall.1990", 1990, 4, "rpartyid", 1, 2, "ethnic", 3)
h.29<-race.subsetting("winter.1991", 1991, 1, "rpartyid", 1, 2, "ethnic", 3)
h.30<-race.subsetting("spring.1991", 1991, 2, "rpartyid", 1, 2, "ethnic", 3)
h.31<-race.subsetting("summer.1991", 1991, 3, "rpartyid", 1, 2, "ethnic", 3)
h.32<-race.subsetting("fall.1991", 1991, 4, "rpartyid", 1, 2, "ethnic", 3)
h.33<-race.subsetting("winter.1992", 1992, 1, "rpartyid", 1, 2, "ethnic", 3)
h.34<-race.subsetting("spring.1992", 1992, 2, "rpartyid", 1, 2, "ethnic", 3)
h.35<-race.subsetting("summer.1992", 1992, 3, "rpartyid", 1, 2, "ethnic", 3)
h.36<-race.subsetting("fall.1992", 1992, 4, "rpartyid", 1, 2, "ethnic", 3)
h.37<-race.subsetting("winter.1993", 1993, 1, "rpartyid", 1, 2, "ethnic", 3)
h.38<-race.subsetting("spring.1993", 1993, 2, "rpartyid", 1, 2, "ethnic", 3)
h.39<-race.subsetting("summer.1993", 1993, 3, "rpartyid", 1, 2, "ethnic", 3)
h.40<-race.subsetting("fall.1993", 1993, 4, "rpartyid", 1, 2, "ethnic", 3)
h.41<-race.subsetting.2("winter.1994", 1994, 1, "d9", 1, 2, 6, 7, "d8", 3)
h.42<-race.subsetting.2("spring.1994", 1994, 2, "d9", 1, 2, 6, 7, "d8", 3)
h.43<-race.subsetting("summer.1994", 1994, 3, "d009", 1, 2, "d008", 3)
h.44<-race.subsetting("fall.1994", 1994, 4, "d009", 1, 2, "d008", 3)
h.45<-race.subsetting("february.1995", 1995, 1, "d009", 1, 2, "d008", 3)
h.46<-race.subsetting("april.1995", 1995, 2, "d009", 1, 2, "d008", 3)
h.47<-race.subsetting("august.1995", 1995, 3, "d009", 1, 2, "d008", 3)
h.48<-race.subsetting("october.1995", 1995, 4, "d009", 1, 2, "d008", 3)
h.49<-race.subsetting("february.1996", 1996, 1, "d009", 1, 2, "d008", 3)
h.50<-race.subsetting("june.One.1996", 1996, 2, "d009", 1, 2, "d008", 3)
h.51<-race.subsetting("june.Two.1996", 1996, 2, "d009", 1, 2, "d008", 3)
h.52<-race.subsetting("september.1996", 1996, 3, "d009", 1, 2, "dh03", 2)
h.53<-race.subsetting("october.1996", 1996, 4, "d009", 1, 2, "dh03", 2)
h.54<-race.subsetting("february.1997", 1997, 1, "d009", 1, 2, "dh03", 2)
h.55<-race.subsetting("april.1997", 1997, 2, "d009", 1, 2, "dh03", 2)
h.56<-race.subsetting("august.1997", 1997, 3, "d009", 1, 2, "dh03", 2)
h.57<-race.subsetting("october.1997", 1997, 4, "d009", 1, 2, "dh03", 2)
h.58<-race.subsetting("february.1998", 1998, 1, "d009", 1, 2, "dh03", 2)
h.59<-race.subsetting("june.1998", 1998, 2, "d009", 1, 2, "dh03", 2)
h.60<-race.subsetting("august.1998", 1998, 3, "d009", 1, 2, "dh03", 2)
h.61<-race.subsetting("october.1998", 1998, 4, "d009", 1, 2, "dh03", 2)

#Combine mini-datasets into a single data frame
tx.overall<-as.data.frame(rbind(d.25,d.26,d.27,d.28,d.29,d.30,d.31,d.32,d.33,d.34,d.35,d.36,d.37,d.38,d.39,d.40,d.41,d.42,d.43,d.44,d.45,d.46,d.47,d.48,d.49,d.50,d.51,d.52,d.53,d.54,d.55,d.56,d.57,d.58,d.59,d.60,d.61))
tx.white<-as.data.frame(rbind(w.25,w.26,w.27,w.28,w.29,w.30,w.31,w.32,w.33,w.34,w.35,w.36,w.37,w.38,w.39,w.40,w.41,w.42,w.43,w.44,w.45,w.46,w.47,w.48,w.49,w.50,w.51,w.52,w.53,w.54,w.55,w.56,w.57,w.58,w.59,w.60,w.61))
tx.hispanic<-as.data.frame(rbind(h.25,h.26,h.27,h.28,h.29,h.30,h.31,h.32,h.33,h.34,h.35,h.36,h.37,h.38,h.39,h.40,h.41,h.42,h.43,h.44,h.45,h.46,h.47,h.48,h.49,h.50,h.51,h.52,h.53,h.54,h.55,h.56,h.57,h.58,h.59,h.60,h.61))

#Aggregate data frames
total.quarter.texas<-aggregate(tx.overall, by=list(tx.overall$joint), FUN=mean, na.rm=TRUE)[,-1]
hispanic.quarter.texas<-aggregate(tx.hispanic, by=list(tx.hispanic$joint), FUN=mean, na.rm=TRUE)[,-1]
white.quarter.texas<-aggregate(tx.white, by=list(tx.white$joint), FUN=mean, na.rm=TRUE)[,-1]

#Combine data
colnames(total.quarter.texas)[1:2]<-c('txRep','txDem')
colnames(hispanic.quarter.texas)[1:2]<-c('txHispRep','txHispDem')
colnames(white.quarter.texas)[1:2]<-c('txWhiteRep','txWhiteDem')
total.quarter.texas[,1:2]<-total.quarter.texas[,1:2]*100
hispanic.quarter.texas[,1:2]<-hispanic.quarter.texas[,1:2]*100
white.quarter.texas[,1:2]<-white.quarter.texas[,1:2]*100
total.quarter.texas$txMacro<-100*(total.quarter.texas$txDem/(total.quarter.texas$txDem+total.quarter.texas$txRep))
hispanic.quarter.texas$txHispMacro<-100*(hispanic.quarter.texas$txHispDem/(hispanic.quarter.texas$txHispDem+hispanic.quarter.texas$txHispRep))
white.quarter.texas$txWhiteMacro<-100*(white.quarter.texas$txWhiteDem/(white.quarter.texas$txWhiteDem+white.quarter.texas$txWhiteRep))
texas.all<-cbind(total.quarter.texas, hispanic.quarter.texas[,c(1,2,6)], white.quarter.texas[,c(1,2,6)])[,c(3,4,5,1,2,6:12)]

#Create data file for Texas
write.csv(texas.all, file="tx9098.csv", row.names=F)

##### UNITED STATES ######
#clear Texas out
rm(list=ls())

###Load Partisanship Data###
party<-read.csv('macropartisanship.csv',header=TRUE)
party<-subset(party, select=-IntMethod)
party$id<-100*party$year+party$quarter
party$SampleSize[is.na(party$SampleSize)]<-1240 #subsitute mean sample size

#collapse by quarter
party.2<-aggregate(party, by=list(party$id), FUN=mean)
party.2<-subset(party.2, select=-c(Group.1,month))
party.2$macropartisanship.q<-as.vector(sapply(split(party,party$id),function(x) weighted.mean(x$macropartisanship, x$SampleSize)))
party.2<-subset(party.2, select=-id)

###Load Consumer Sentiment Data###
consumer<-read.csv('consumerSentiment.csv')
#consumer$id<-100*consumer$Year+consumer$Quarter

###Load Presidential Approval Data###
approval<-read.csv('presApprovalGallup.csv')
approval$id<-100*approval$year+approval$quarter

#collapse by quarter
approval.2<-aggregate(approval, by=list(approval$id), FUN=mean)
approval.2<-subset(approval.2, select=-c(Group.1,month))
approval.2$approve.q<-as.vector(sapply(split(approval,approval$id),function(x) weighted.mean(x$approve, x$sample)))
approval.2<-subset(approval.2, select=-id)

###Merge Three Data Sets###
merge.1<-merge(x=party.2,y=consumer,by.x=c('year','quarter'),by.y=c('Year','Quarter'),all.y=TRUE)
full<-merge(x=approval.2,y=merge.1,by=c('year','quarter'),all.y=TRUE)

##What's the time frame? 1969-2010##
us.short<-subset(full,year>=1969 & year<=2010, select=c(year,quarter,approve.q,macropartisanship.q,Index,party,president))

##Create TS union with lags##
#Party and president missing a couple of values for coding reasons. LVCF correctly codes in this case.
party.0<-interpNA(us.short$party,method='before')
president.0<-interpNA(us.short$president,method='before')
#Approval is missing in the 3rd quarter of 1972 & 1976.
approve.0<-interpNA(us.short$approve.q,method='linear')
approve.q<-ts(approve.0*party.0)
macropartisanship.q<-ts(us.short$macropartisanship.q)
party<-ts(party.0)
president<-ts(president.0)
sentiment<-ts(us.short$Index*party.0)
year<-ts(us.short$year)
quarter<-ts(us.short$quarter)
pol.mod<-lm(approve.q~sentiment);summary(pol.mod)
political<-ts(pol.mod$residuals)
l.approve<-lag(approve.q,-1)
l.macropartisanship<-lag(macropartisanship.q,-1)
l.sentiment<-lag(sentiment,-1)
l.political<-lag(political,-1)
id<-ts(us.short$year*100+us.short$quarter)
us.time<-ts.union(political,approve.q,macropartisanship.q,sentiment,year,quarter,l.political,l.approve,l.macropartisanship,l.sentiment,party,president,id)[-c(168:169),c(5,6,1:4,7:13)]

#save file
write.csv(us.time, file="usMacropartisanship.csv", row.names=F)

##### CALIFORNIA #####
ca.0<-read.spss("umembargo_cumulative.sav", use.value.labels=FALSE, to.data.frame=TRUE)
#codebook<-read.spss("umembargo_cumulative.sav", use.value.labels=TRUE, to.data.frame=TRUE)

#Identify which quarter
ca.quarters<-read.csv('fieldQuarters.csv')
ca<-merge(x=ca.quarters, y=ca.0, by='POLLNUM', all.y=TRUE)
ca$id<-100*ca$YEAR+ca$quarter

#Recode Republicanism into a dummy
#table(california$PID3, california$YEAR)
ca$PID3[ca$PID3==9]<-NA
ca$dem<-as.numeric(ca$PID3==3)*100
ca$rep<-as.numeric(ca$PID3==1)*100

#consolidate weights
ca$WEIGHT[is.na(ca$WEIGHT)]<-ca$PWEIGHT[is.na(ca$WEIGHT)]
ca$WEIGHT[is.na(ca$WEIGHT)]<-ca$OWEIGHT[is.na(ca$WEIGHT)]

#Create complete, Hispanic, and white subsets
california<-subset(ca,select=c(dem,rep,YEAR,quarter,id,WEIGHT))
hispanic<-subset(ca,RACE2==3,select=c(dem,rep,YEAR,quarter,id,WEIGHT))
white<-subset(ca,RACE2==1,select=c(dem,rep,YEAR,quarter,id,WEIGHT))

#Build aggregate california datasets including weights
ca.total.q<-as.data.frame(do.call("rbind",by(data=california, INDICES=california$id, FUN=function(d)apply(d,2,weighted.mean,w=d[,6],na.rm=TRUE),simplify=TRUE)))
ca.hispanic.q<-as.data.frame(do.call("rbind",by(data=hispanic, INDICES=hispanic$id, FUN=function(d)apply(d,2,weighted.mean,w=d[,6],na.rm=TRUE),simplify=TRUE)))
ca.white.q<-as.data.frame(do.call("rbind",by(data=white, INDICES=white$id, FUN=function(d)apply(d,2,weighted.mean,w=d[,6],na.rm=TRUE),simplify=TRUE)))

#cleaning
ca.total.q<-subset(ca.total.q, subset=!is.nan(ca.total.q$dem)&YEAR>=1969,select=-c(WEIGHT,id))#subset to be the same length as the demographic subsets
ca.hispanic.q<-subset(ca.hispanic.q, subset=!is.nan(ca.hispanic.q$dem)&YEAR>=1969,select=-c(WEIGHT,id))
ca.white.q<-subset(ca.white.q, subset=!is.nan(ca.white.q$dem)&YEAR>=1969,select=-c(WEIGHT,id))

#append new weighted data
ca.updates<-read.csv('WEIGHTED.UCDATA.csv')
total.up<-subset(ca.updates,select=c(sampAll,repAll,demAll,YEAR,quarter))
total.up$dem<-100*total.up$demAll/total.up$sampAll
total.up$rep<-100*total.up$repAll/total.up$sampAll
total.up<-total.up[,c('dem','rep','YEAR','quarter')]
ca.total.q<-rbind(ca.total.q,total.up)

hispanic.up<-subset(ca.updates,select=c(sampHisp,repHisp,demHisp,YEAR,quarter))
hispanic.up$dem<-100*hispanic.up$demHisp/hispanic.up$sampHisp
hispanic.up$rep<-100*hispanic.up$repHisp/hispanic.up$sampHisp
hispanic.up<-hispanic.up[,c('dem','rep','YEAR','quarter')]
ca.hispanic.q<-rbind(ca.hispanic.q,hispanic.up)

white.up<-subset(ca.updates,select=c(sampWhite,repWhite,demWhite,YEAR,quarter))
white.up$dem<-100*white.up$demWhite/white.up$sampWhite
white.up$rep<-100*white.up$repWhite/white.up$sampWhite
white.up<-white.up[,c('dem','rep','YEAR','quarter')]
ca.white.q<-rbind(ca.white.q,white.up)

#merge with national data
full.ca<-merge(y=ca.total.q,x=us.short,by.y=c('YEAR','quarter'),by.x=c('year','quarter'),all.x=TRUE)
full.ca.hisp<-merge(y=ca.hispanic.q,x=us.short,by.y=c('YEAR','quarter'),by.x=c('year','quarter'),all.x=TRUE)
full.ca.white<-merge(y=ca.white.q,x=us.short,by.y=c('YEAR','quarter'),by.x=c('year','quarter'),all.x=TRUE)

##Create TS union with lags##
#Party and president missing a couple of values for coding reasons. LVCF correctly codes in this case.
party.0<-interpNA(full.ca$party,method='before')
president.0<-interpNA(full.ca$president,method='before')
#Approval is missing in the 3rd quarter of 1972 & 1976.
approve.0<-interpNA(full.ca$approve.q,method='linear')
approve.q<-ts(approve.0*party.0)
#California Dem & Rep have some missing
dem.0<-interpNA(full.ca$dem,method='linear')
rep.0<-interpNA(full.ca$rep,method='linear')
caPartisanship<-100*dem.0/(dem.0+rep.0)
caPartisanship.q<-ts(caPartisanship)

#Remaining Series
party<-ts(party.0)
president<-ts(president.0)
sentiment<-ts(full.ca$Index*party.0)
year<-ts(full.ca$year)
quarter<-ts(full.ca$quarter)
pol.mod<-lm(approve.q~sentiment);summary(pol.mod)
political<-ts(pol.mod$residuals)
l.approve<-lag(approve.q,-1)
l.caPartisanship<-lag(caPartisanship.q,-1)
l.sentiment<-lag(sentiment,-1)
l.political<-lag(political,-1)
id<-ts(full.ca$year*100+full.ca$quarter)
ca.time<-ts.union(political,approve.q,caPartisanship.q,sentiment,year,quarter,l.political,l.approve,l.caPartisanship,l.sentiment,party,president,id)[-c(168:169),]

#Repeat for Hispanic data
party.hisp.0<-interpNA(full.ca.hisp$party,method='before')
president.hisp.0<-interpNA(full.ca.hisp$president,method='before')
approve.hisp.0<-interpNA(full.ca.hisp$approve.q,method='linear')
approve.hisp.q<-ts(approve.hisp.0*party.0)
dem.hisp.0<-interpNA(full.ca.hisp$dem,method='linear')
rep.hisp.0<-interpNA(full.ca.hisp$rep,method='linear')
caPartisanship.hisp<-100*dem.hisp.0/(dem.hisp.0+rep.hisp.0)
caPartisanship.hisp.q<-ts(caPartisanship.hisp)
party.hisp<-ts(party.hisp.0)
president.hisp<-ts(president.hisp.0)
sentiment.hisp<-ts(full.ca.hisp$Index*party.hisp.0)
year.hisp<-ts(full.ca.hisp$year)
quarter.hisp<-ts(full.ca.hisp$quarter)
pol.mod.hisp<-lm(approve.hisp.q~sentiment.hisp);summary(pol.mod.hisp)
political.hisp<-ts(pol.mod.hisp$residuals)
l.approve.hisp<-lag(approve.hisp.q,-1)
l.caPartisanship.hisp<-lag(caPartisanship.hisp.q,-1)
l.sentiment.hisp<-lag(sentiment.hisp,-1)
l.political.hisp<-lag(political.hisp,-1)
id.hisp<-ts(full.ca.hisp$year*100+full.ca.hisp$quarter)
ca.time.hisp<-ts.union(political.hisp,approve.hisp.q,caPartisanship.hisp.q,sentiment.hisp,year.hisp,quarter.hisp,l.political.hisp,l.approve.hisp,l.caPartisanship.hisp,l.sentiment.hisp,party.hisp,president.hisp,id.hisp)[-c(168:169),]

#Repeat for white data
party.white.0<-interpNA(full.ca.white$party,method='before')
president.white.0<-interpNA(full.ca.white$president,method='before')
approve.white.0<-interpNA(full.ca.white$approve.q,method='linear')
approve.white.q<-ts(approve.white.0*party.0)
dem.white.0<-interpNA(full.ca.white$dem,method='linear')
rep.white.0<-interpNA(full.ca.white$rep,method='linear')
caPartisanship.white<-100*dem.white.0/(dem.white.0+rep.white.0)
caPartisanship.white.q<-ts(caPartisanship.white)
party.white<-ts(party.white.0)
president.white<-ts(president.white.0)
sentiment.white<-ts(full.ca.white$Index*party.white.0)
year.white<-ts(full.ca.white$year)
quarter.white<-ts(full.ca.white$quarter)
pol.mod.white<-lm(approve.white.q~sentiment.white);summary(pol.mod.white)
political.white<-ts(pol.mod.white$residuals)
l.approve.white<-lag(approve.white.q,-1)
l.caPartisanship.white<-lag(caPartisanship.white.q,-1)
l.sentiment.white<-lag(sentiment.white,-1)
l.political.white<-lag(political.white,-1)
id.white<-ts(full.ca.white$year*100+full.ca.white$quarter)
ca.time.white<-ts.union(political.white,approve.white.q,caPartisanship.white.q,sentiment.white,year.white,quarter.white,l.political.white,l.approve.white,l.caPartisanship.white,l.sentiment.white,party.white,president.white,id.white)[-c(168:169),]

#save files
write.csv(ca.time, file="caTotalMacropartisanshipWeight.csv", row.names=F)
write.csv(ca.time.hisp, file="caHispMacropartisanshipWeight.csv", row.names=F)
write.csv(ca.time.white, file="caWhiteMacropartisanshipWeight.csv", row.names=F)


